#!/bin/bash
#source util.sh

#get the quesionId accessby baidu
function filter_baidu_refer()
{
    awk '
    {
        if (index($4, "/ask/detail/")!=0 && index($5, "baidu") !=0) {
            token_num = split($4, tokens, "/");
            if (token_num<=4 && length(tokens[4]) > 0) {
                print tokens[4], $5;
            }
        }
    }' $1 > $2
}

original_log_dir="/data/link_analyse/web_log"
dst_dir="/data/link_analyse/filter_log"
count_dir="/data/link_analyse/access_count"
file_num=0
for file in `ls $original_log_dir`
do
    filter_baidu_refer $original_log_dir"/"$file $dst_dir"/"$file
    awk '{print $1}' $dst_dir"/"$file | sort -k1 | uniq -c > $count_dir"/"$file"_count"
    ((++file_num))
    echo -e "It is the "$file_num"th file. The file name is "$file"."
done

